library(Seurat)
## Attaching SeuratObject
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(stringr)
library(tibble)
library(patchwork)
First we load the sister pair DE tables and filter for:
absolute avg_log2FC > 0.5 (~41% increase)
p_val_adj < 0.01
DE_list <- readRDS("~/spinal_cord_paper/data/Gg_ctrl_poly_sis_markers.rds")
for (i in seq(DE_list)) {
DE_list[[i]] <- DE_list[[i]] %>%
arrange(desc(avg_log2FC)) %>%
filter(abs(avg_log2FC) > 0.5) %>%
filter(p_val_adj < 0.01)
}
DE_table <- do.call(rbind, DE_list)
dim(DE_table)
## [1] 807 8
hist(abs(DE_list[[1]]$delta_pct), breaks = 20)
abline(v = 0.1, lty = "dashed", col = "red")
hist(abs(DE_list[[2]]$delta_pct), breaks = 20)
abline(v = 0.1, lty = "dashed", col = "red")
hist(abs(DE_list[[4]]$delta_pct), breaks = 20)
abline(v = 0.1, lty = "dashed", col = "red")
hist(abs(DE_list[[5]]$delta_pct), breaks = 20)
abline(v = 0.1, lty = "dashed", col = "red")
Now we filter the DE lists for absolute delta percentage > 0.1.
for (i in seq(DE_list)) {
DE_list[[i]] <- DE_list[[i]] %>%
filter(abs(delta_pct) > 0.1)
}
DE_table <- do.call(rbind, DE_list)
dim(DE_table)
## [1] 671 8
broad_order <- c("progenitors",
"FP",
"RP",
"FP/RP",
"neurons",
"OPC",
"MFOL",
"pericytes",
"microglia",
"blood",
"vasculature"
)
Load the integrated control and poly data.
int_path <- "Gg_ctrl_poly_int_seurat_250723"
my.se <- readRDS(paste0("~/spinal_cord_paper/data/", int_path, ".rds"))
annot_int <- read.csv(list.files("~/spinal_cord_paper/annotations",
pattern = str_remove(int_path, "_seurat_\\d{6}"),
full.names = TRUE))
if(length(table(annot_int$number)) != length(table(my.se$seurat_clusters))) {
stop("Number of clusters must be identical!")
}
# rename for left join
annot_int <- annot_int %>%
mutate(fine = paste(fine, number, sep = "_")) %>%
mutate(number = factor(number, levels = 1:nrow(annot_int))) %>%
rename(seurat_clusters = number)
ord_levels <- annot_int$fine[order(match(annot_int$broad, broad_order))]
# add cluster annotation to meta data
my.se@meta.data <- my.se@meta.data %>%
rownames_to_column("rowname") %>%
left_join(annot_int, by = "seurat_clusters") %>%
mutate(fine = factor(fine, levels = ord_levels)) %>%
mutate(seurat_clusters = factor(seurat_clusters, levels = str_extract(ord_levels, "\\d{1,2}$"))) %>%
column_to_rownames("rowname")
ctrl_poly_int_combined_labels <- readRDS("~/spinal_cord_paper/annotations/ctrl_poly_int_combined_labels.rds")
my.se <- AddMetaData(my.se, ctrl_poly_int_combined_labels)
DimPlot(
my.se,
group.by = "annot_sample",
reduction = "tsne",
label = TRUE,
repel = TRUE
) +
NoLegend()
Get the cluster order from the spearman correlation heatmap of the control and poly integrated data. Then we filter for the neuronal clusters only.
corr_heatmap <- readRDS("~/spinal_cord_paper/output/heatmap_spearman_ctrl_poly.rds")
#heatmap order
htmp_order <- data.frame("label" = corr_heatmap[["gtable"]]$grobs[[4]]$label) %>%
mutate(label = str_remove(label, "_int")) %>%
mutate(label_ordered = paste(str_sub(label,6 ,-1), str_sub(label, 1, 4), sep = "_"))
my.se@meta.data <- my.se@meta.data %>%
mutate(annot_sample = factor(annot_sample, levels = htmp_order$label_ordered))
Idents(my.se) <- "annot_sample"
# filter for the neuronal clusters
my.se <- subset(my.se, idents = htmp_order$label_ordered[grepl("neurons|MN|CSF", htmp_order$label_ordered)])
DimPlot(
my.se,
group.by = "annot_sample",
reduction = "tsne",
label = TRUE,
repel = TRUE
) +
NoLegend()
my.se@active.assay <- "RNA"
# Dotplot of sister pair makrers
pl_all <- modplots::mDotPlot2(my.se,
group.by = "annot_sample",
# reverse order of unique genes so number one is on top
features = rev(unique(DE_table$Gene.stable.ID)),
gnames = modplots::gnames,
cols = c("lightgrey", "black")) +
theme(axis.text.x = element_text(angle = 90, hjust=1, vjust=0.5)) +
coord_flip()
##
pl_all
pdf("~/spinal_cord_paper/figures/Sister_pair_DE_dotplot.pdf", width = 15, height = 32)
pl_all
DE_table$Gene.name[duplicated(DE_table$Gene.stable.ID)]
## [1] "HES5" "MAP6" "GNG5"
## [4] "ST18" "GAD1" "FABP3"
## [7] "SYT1" "SLC32A1" "KIF5C"
## [10] "HMP19" "GALNT9" "VSTM2L"
## [13] "HINTW" "DNER" "CRABP-I"
## [16] "RELN" "PAX2" "NEUROD2"
## [19] "CHL1" "LHX1" "NRXN3"
## [22] "ENSGALG00000029521" "BHLHE22" "SPOCK1"
## [25] "SSTR1" "SLC32A1" "NCALD"
## [28] "ID2" "GRIK3" "GAD2"
## [31] "PTPRK" "GABRG3" "GAD1"
## [34] "RUNX1T1" "HPCAL1" "ZEB2"
## [37] "GALNT9" "ENSGALG00000013212" "MDK"
## [40] "ZFPM2" "RELN" "NEUROD6"
## [43] "CPLX1" "LAMP5" "WNT5A"
## [46] "HINTW" "SOX4" "DKK3"
## [49] "UNC13B" "ATP1B1" "GALNT17"
## [52] "RASD1" "ENSGALG00000051980" "PLXNA4"
## [55] "DACT2" "DISP3" "MVB12B"
## [58] "ENSGALG00000054223" "CNTN4" "ZNF423"
## [61] "CBLB" "FKBP1B" "CELF2"
## [64] "EPB41L4A" "PXYLP1" "ENSGALG00000023640"
## [67] "CNTN2" "MRPS6" "PPP3CA"
## [70] "NFIX" "NFIA" "SOX8"
## [73] "DRAXIN" "CRABP-I" "NHLH1"
## [76] "TAC1" "VSTM2L" "CPNE2"
## [79] "PRKCA"
# select top50 by log2FC
for (i in seq(DE_list)) {
DE_list[[i]] <- DE_list[[i]] %>%
slice_max(order_by = abs(avg_log2FC), n = 50) %>%
arrange(desc(avg_log2FC))
}
p1 <- modplots::mDotPlot2(my.se,
group.by = "annot_sample",
assay = "RNA",
# reverse order of DE genes so number one is on top
features = rev(DE_list[[1]]$Gene.stable.ID),
gnames = modplots::gnames,
cols = c("lightgrey", "black")) +
theme(axis.text.x = element_blank()) +
coord_flip() +
xlab(names(DE_list)[1]) +
ylab(element_blank())
p2 <- modplots::mDotPlot2(my.se,
group.by = "annot_sample",
assay = "RNA",
# reverse order of DE genes so number one is on top
features = rev(DE_list[[2]]$Gene.stable.ID),
gnames = modplots::gnames,
cols = c("lightgrey", "black")) +
theme(axis.text.x = element_blank()) +
coord_flip() +
xlab(names(DE_list)[2]) +
ylab(element_blank())
p3 <- modplots::mDotPlot2(my.se,
group.by = "annot_sample",
assay = "RNA",
# reverse order of DE genes so number one is on top
features = rev(DE_list[[5]]$Gene.stable.ID),
gnames = modplots::gnames,
cols = c("lightgrey", "black")) +
theme(axis.text.x = element_blank()) +
coord_flip() +
xlab(names(DE_list)[5]) +
ylab(element_blank())
p4 <- modplots::mDotPlot2(my.se,
group.by = "annot_sample",
assay = "RNA",
# reverse order of DE genes so number one is on top
features = rev(DE_list[[4]]$Gene.stable.ID),
gnames = modplots::gnames,
cols = c("lightgrey", "black")) +
theme(axis.text.x = element_text(angle = 90, hjust=1, vjust=0.5)) +
coord_flip() +
xlab(names(DE_list)[4]) +
ylab(element_blank())
layout <- "CCDD
CC##"
pdf("~/spinal_cord_paper/figures/Supp_Fig_5_ctrl_poly_dotplot_individual.pdf", height = 21, width = 7)
# without labels for proper alignment
(p1 + p2 + plot_layout(guides = "collect")) /
(p3 + p4 + plot_layout(guides = "collect", design = layout)) &
theme(axis.text.x = element_blank(),
axis.text.y = element_blank())
# with labels to transfer in illustrator
(p1 + p2 + plot_layout(guides = "collect")) /
(p3 + p4 + plot_layout(guides = "collect", design = layout))
dev.off()
## png
## 2
Find Markers for clusters 11_ctrl, 16_ctrl, and 15_poly.
gnames <- modplots::gnames
markers <- list()
clu <- c("inhibitory_neurons_16_ctrl",
"excitatory neurons_11_ctrl",
"excitatory_neurons_15_poly")
for (i in seq(clu)) {
markers[[i]] <- FindMarkers(
my.se,
ident.1 = clu[i],
group.by = "annot_sample",
assay = "RNA",
verbose = FALSE,
only.pos = TRUE, # we look for overexpressed, specific markers
min.pct = 0.25,
logfc.threshold = 0.25,
latent.vars = c("CC.Difference.seurat"),
test.use = "MAST"
) %>%
tibble::rownames_to_column("Gene.stable.ID") %>%
dplyr::left_join(gnames, by = "Gene.stable.ID") %>%
dplyr::arrange(-avg_log2FC) %>%
dplyr::filter(p_val_adj < 0.05) %>%
dplyr::filter(abs(avg_log2FC) > 0.5) %>%
dplyr::mutate(delta_pct = abs(pct.1 - pct.2))
}
##
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
##
## Done!
##
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
##
## Done!
##
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
##
## Done!
names(markers) <- clu
Plot the top 50 markers for clusters 11_ctrl, 16_ctrl, and 15_poly.
n <- 50
mark_plot <- list()
for (i in seq(clu)) {
mark_plot[[i]] <- modplots::mDotPlot2(my.se,
group.by = "annot_sample",
# reverse order of markers so number one is on top
features = rev(markers[[i]][1:n,"Gene.stable.ID"]),
gnames = modplots::gnames) +
theme(axis.text.x = element_text(angle = 90, hjust=1, vjust=0.5)) +
coord_flip() +
scale_colour_gradientn(colours = c("gray90","gray80","yellow", "orange", "red", "darkred", "darkred")) +
ggtitle(paste0("Top ", n, " markers by log2FC for ", clu[i]))
}
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
mark_plot[[1]]
mark_plot[[2]]
mark_plot[[3]]
pdf("~/spinal_cord_paper/figures/Sister_pair_neuron_marker_dotplots.pdf", width = 14, height = n/3)
mark_plot[[1]]
mark_plot[[2]]
mark_plot[[3]]
# Date and time of Rendering
Sys.time()
## [1] "2024-06-11 12:00:16 CEST"
sessionInfo()
## R version 4.1.0 (2021-05-18)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: CentOS Linux 7 (Core)
##
## Matrix products: default
## BLAS/LAPACK: /scicore/soft/apps/OpenBLAS/0.3.1-GCC-7.3.0-2.30/lib/libopenblas_sandybridgep-r0.3.1.so
##
## locale:
## [1] en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] patchwork_1.1.1 tibble_3.1.8 stringr_1.4.0 ggplot2_3.3.3
## [5] dplyr_1.0.10 SeuratObject_4.0.2 Seurat_4.0.5
##
## loaded via a namespace (and not attached):
## [1] plyr_1.8.6 igraph_1.2.6
## [3] lazyeval_0.2.2 sp_1.4-5
## [5] splines_4.1.0 listenv_0.8.0
## [7] scattermore_0.7 GenomeInfoDb_1.28.0
## [9] digest_0.6.27 htmltools_0.5.1.1
## [11] fansi_0.5.0 magrittr_2.0.1
## [13] memoise_2.0.0 tensor_1.5
## [15] cluster_2.1.2 ROCR_1.0-11
## [17] globals_0.16.2 Biostrings_2.60.0
## [19] matrixStats_0.58.0 modplots_1.0.0
## [21] spatstat.sparse_3.0-0 prettyunits_1.1.1
## [23] colorspace_2.0-1 blob_1.2.1
## [25] ggrepel_0.9.1 xfun_0.34
## [27] RCurl_1.98-1.3 crayon_1.4.1
## [29] jsonlite_1.7.2 spatstat.data_3.0-0
## [31] survival_3.2-11 zoo_1.8-9
## [33] glue_1.6.2 polyclip_1.10-0
## [35] gtable_0.3.0 zlibbioc_1.38.0
## [37] XVector_0.32.0 leiden_0.3.9
## [39] DelayedArray_0.18.0 SingleCellExperiment_1.14.1
## [41] future.apply_1.7.0 BiocGenerics_0.38.0
## [43] abind_1.4-5 scales_1.1.1
## [45] pheatmap_1.0.12 DBI_1.1.1
## [47] miniUI_0.1.1.1 Rcpp_1.0.7
## [49] progress_1.2.2 viridisLite_0.4.0
## [51] xtable_1.8-4 reticulate_1.22
## [53] spatstat.core_2.1-2 bit_4.0.4
## [55] stats4_4.1.0 htmlwidgets_1.5.3
## [57] httr_1.4.2 RColorBrewer_1.1-2
## [59] ellipsis_0.3.2 ica_1.0-2
## [61] pkgconfig_2.0.3 farver_2.1.0
## [63] sass_0.4.0 uwot_0.1.10
## [65] deldir_1.0-6 utf8_1.2.1
## [67] tidyselect_1.2.0 labeling_0.4.2
## [69] rlang_1.0.6 reshape2_1.4.4
## [71] later_1.2.0 AnnotationDbi_1.54.0
## [73] munsell_0.5.0 tools_4.1.0
## [75] cachem_1.0.5 cli_3.4.1
## [77] generics_0.1.3 RSQLite_2.2.7
## [79] ggridges_0.5.3 org.Gg.eg.db_3.13.0
## [81] evaluate_0.20 fastmap_1.1.0
## [83] yaml_2.2.1 goftest_1.2-2
## [85] knitr_1.41 bit64_4.0.5
## [87] fitdistrplus_1.1-6 purrr_0.3.4
## [89] RANN_2.6.1 KEGGREST_1.32.0
## [91] pbapply_1.4-3 future_1.30.0
## [93] nlme_3.1-152 mime_0.10
## [95] compiler_4.1.0 rstudioapi_0.13
## [97] plotly_4.10.0 png_0.1-7
## [99] spatstat.utils_3.0-1 bslib_0.2.5.1
## [101] stringi_1.6.2 highr_0.9
## [103] lattice_0.20-44 Matrix_1.3-3
## [105] vctrs_0.5.1 pillar_1.8.1
## [107] lifecycle_1.0.3 spatstat.geom_3.0-3
## [109] lmtest_0.9-38 jquerylib_0.1.4
## [111] RcppAnnoy_0.0.19 bitops_1.0-7
## [113] data.table_1.14.0 cowplot_1.1.1
## [115] irlba_2.3.3 GenomicRanges_1.44.0
## [117] httpuv_1.6.1 R6_2.5.0
## [119] promises_1.2.0.1 KernSmooth_2.23-20
## [121] gridExtra_2.3 IRanges_2.26.0
## [123] parallelly_1.33.0 codetools_0.2-18
## [125] MASS_7.3-54 assertthat_0.2.1
## [127] MAST_1.18.0 SummarizedExperiment_1.22.0
## [129] withr_2.4.2 sctransform_0.3.3
## [131] GenomeInfoDbData_1.2.6 S4Vectors_0.30.0
## [133] hms_1.1.0 mgcv_1.8-35
## [135] parallel_4.1.0 grid_4.1.0
## [137] rpart_4.1-15 tidyr_1.1.3
## [139] rmarkdown_2.17 MatrixGenerics_1.4.0
## [141] Rtsne_0.15 Biobase_2.52.0
## [143] shiny_1.6.0